

## Eunji Kim and Cindy Kam
## Othering in Everyday Life: Anti-Chinese Bias in the COVID-19 Pandemic
## Public Opinion Quarterly  
## Codes to replicate all figures and tables in the appendix and supplementary materials 


rm(list=ls())

## load libraries 

library(tm)
library(tidyverse)
library(ggplot2)
library(dotwhisker)
library(glue)
library(stringr)
library(xts)
library(lubridate)
library(plyr)
library(ggpubr)
library(dplyr)
library(fixest)
library(ggeffects)
library(extrafont)
library(lfe)
library(estimatr)
library(ggeffects)
library(ggeasy)


## load needed data before running the analyses 

load("poq_kimkam_appendix.RData")

##################################################################
## Appendix B: Simulated Distribution of Ratings
##################################################################

## Table B1: Percentage Point Differences Between 2019 Baseline and Simulated Ratings


# simulation - what does 0.5 cut mean?

chinese_pre_only <- all19 %>% 
  filter(chinese_vs_american==1)

american_pre_only <- all19 %>% 
  filter(chinese_vs_american==0)

# chinese restaurant - 2019 average 
avg_chinese <- chinese_pre_only  %>%
  group_by(business.id) %>%
  summarise_at(vars(rating), list(avgrating = mean))

# american restaurant - 2019 average 
avg_american <- american_pre_only  %>%
  group_by(business.id) %>%
  summarise_at(vars(rating), list(avgrating = mean))

# chinese - 2019 rounding up 

avg_chinese$rounded19 <- round_any(avg_chinese$avgrating,0.5)
avg_american$rounded19 <- round_any(avg_american$avgrating,0.5)

avg_chinese$avg_chinese_simulated <- avg_chinese$avgrating -0.044
avg_chinese$rounded_simulated <- round_any(avg_chinese$avg_chinese_simulated, 0.5)


chinese <- as.data.frame(avg_chinese$rounded19) 

chinese$group <- 2019
chinese$rating <- chinese$`avg_chinese$rounded19` 
chinese$`avg_chinese$rounded19`  <- NULL 

chinese2 <- as.data.frame(avg_chinese$rounded_simulated) 
chinese2$group <- 'Simulated'
chinese2$rating <- chinese2$`avg_chinese$rounded_simulated` 
chinese2$`avg_chinese$rounded_simulated`  <- NULL 

chinese_combined <- rbind(chinese, chinese2)


# american 

avg_american$avg_american_simulated <- avg_american$avgrating + 0.087 
avg_american$rounded_simulated <- round_any(avg_american$avg_american_simulated, 0.5)


american <- as.data.frame(avg_american$rounded19) 

american$group <- '2019'
american$rating <- american$`avg_american$rounded19` 
american$`avg_american$rounded19`  <- NULL 

american2 <- as.data.frame(avg_american$rounded_simulated) 
american2$group <- 'Simulated'
american2$rating <- american2$`avg_american$rounded_simulated` 
american2$`avg_american$rounded_simulated`  <- NULL 

american_combined <- rbind(american, american2)

# creating proportion table 

# Chinese 

pc1 <- as.data.frame(prop.table(table(chinese_combined$rating[chinese_combined$group=='2019']))*100)
pc2 <- as.data.frame(prop.table(table(chinese_combined$rating[chinese_combined$group=='Simulated']))*100)

pc1$group <- '2019'
pc2$group <- 'Simulated'


c1 <- table(chinese_combined$rating[chinese_combined$group=='2019']) 
c2 <- table(chinese_combined$rating[chinese_combined$group=='Simulated'])

pc1$N <- as.data.frame(c1)$Freq
pc2$N <- as.data.frame(c2)$Freq

pc3 <- rbind(pc1, pc2)
pc3$Freq <- round(pc3$Freq, 2)
pc3$Prop <-  pc3$Freq

pc3$group[pc3$group=="2019"] <- 'Chinese 2019'
pc3$group[pc3$group=="Simulated"] <- 'Chinese Simulated'

pc_diff <- pc2$Freq - pc1$Freq
pc_diff <- as.data.frame(pc_diff)
pc_diff$rating <- pc1$Var1

# American 
ac1 <- as.data.frame(prop.table(table(american_combined$rating[american_combined$group=='2019']))*100)
ac2 <- as.data.frame(prop.table(table(american_combined$rating[american_combined$group=='Simulated']))*100)

ac_diff <- ac2$Freq - ac1$Freq
ac_diff <- as.data.frame(ac_diff)
ac_diff$rating <- ac1$Var1

ac1$group <- '2019'
ac2$group <- 'Simulated'

# ading N 
a1 <-table(american_combined$rating[american_combined$group=='2019']) 
a2 <- table(american_combined$rating[american_combined$group=='Simulated'])

ac1$N <- as.data.frame(a1)$Freq
ac2$N <- as.data.frame(a2)$Freq

ac3 <- rbind(ac1, ac2)
ac3$Freq <- round(ac3$Freq, 2)
ac3$Prop <-  ac3$Freq
ac3$group[ac3$group=="2019"] <- 'American 2019'
ac3$group[ac3$group=="Simulated"] <- 'American Simulated'

# combined table

t2 <- rbind(pc3, ac3)
t2$Freq <- NULL
t2


##  Figure B1: Percentage Point Differences Between 2019 Baseline and Simulated Ratings


jpeg("Kim and Kam_Appendix B1.jpeg", units="in", width=7, height=4, res=2000)

ggplot(data=diff2, aes(x=rating, y=diff, fill=group)) +
  geom_bar(stat="identity", position=position_dodge(), alpha=0.7)+
  geom_text(aes(label=diff, fontface=2), vjust=1.6, color="black",
            position = position_dodge(0.9), size=3)+
  scale_fill_manual(values=c("grey60", "#8a0002")) +
  theme_minimal() + theme(legend.title=element_blank()) + ylab("Difference in %")  +
  xlab("Front-Facing Yelp Rating") + theme(legend.position = "top") + ylim(-2.5, 4)


dev.off()


##################################################################
## Appendix C: Effects on Other Asian Restaurants
##################################################################

## Table C1: The Impact of the Pandemic on Restaurant Ratings - Other Asian Restaurants

j  <- feols(rating ~ japanese_vs_american*prepost | city +  elitestatus,cluster = ~business.id ,data = all1)
k  <- feols(rating ~ korean_vs_american*prepost | city +  elitestatus,cluster = ~business.id ,data = all1)
v <- feols(rating ~ vietnam_vs_american*prepost | city +  elitestatus,cluster = ~business.id ,data = all1)
t  <- feols(rating ~ thai_vs_american*prepost | city + elitestatus,cluster = ~business.id,data = all1)
i <- feols(rating ~ indian_vs_american*prepost | city + elitestatus, cluster = ~business.id ,data = all1)

etable(j,k,v, t, i, digits = 3,label = c("tab:placebo"),tex = T, depvar = F, title = "The Impact of Pandemic on Other Asian Restaurants",
       file = "Appendix Table C1.tex",replace=T)



##################################################################
## Supplementary Material A: Media and Public Opinion Context 
##################################################################

## Figure A1: Cable TV News References to Wuhan/Chinese Virus


s <- ggplot(stanfordtv, aes(x=Time, y=minutes)) +
  scale_x_date(expand = expansion(mult = c(0, 0)), limits = c(as.Date("2019-12-01"), as.Date("2020-12-30")),
               breaks = "1 month", date_labels = "%b %Y") + 
  scale_y_continuous(expand = expansion(mult = c(0, 0.014)), breaks = seq(0, 1, by = 0.2)) +
  geom_line(color="#8a0002") + 
  xlab("") + 
  ylab("Minutes per day") +
  theme_classic() + 
  theme(panel.grid.minor.x = element_blank()) + 
  theme(panel.grid.minor.y = element_blank()) + 
  theme(axis.title=element_text(size=10)) + 
  theme(axis.line = element_line(size = 0.3, colour = "black"))

s

jpeg("stanfordtv.jpeg", units="in", width=9, height=5, res=400)
s
dev.off()


## Figure A2: Trends in Unfavorable Attitudes Toward Asians


p_asian <- ggplot(ns_weekly1) + aes(wn, unfav, group = prepost) + geom_point(color="#2E383D") + theme_classic() + 
  stat_smooth(aes(y=unfav, x=wn), method = loess, fill="#8a0002", color="#8a0002") +   
  xlab("Date") + ylab("% with unfavorable view toward Asians")+ 
  scale_x_date(breaks = scales::breaks_pretty(15)) + 
  theme(axis.text.x = element_text(angle=90, hjust=1)) + 
  ylim(5, 25)  +  theme( panel.grid.minor = element_blank()) + theme(legend.title = element_blank()) + 
  theme(legend.position = c(0.93, 0.93)) + 
  labs(caption = "Source: Democracy Fund + UCLA Nationscape surveys") + 
  ggplot2::annotate("text", x = ymd("2020-06-06"), y = 25, fontface=2, size=3, 
                    label = "President Trump Declares \n State of Emergency for COVID-19") + 
  geom_vline(xintercept = as.numeric(ymd("2020-03-13")), linetype="dotted", 
             color = "black", size=0.5) 

jpeg("nationscape_asianonly.jpeg", units="in", width=8, height=5, res=300)

p_asian

dev.off()

##################################################################
## Supplementary Material B: Robustness Checks and Placebo Tests
##################################################################


## Table B1: The Impact of the Pandemic on Restaurant Ratings - All Chinese-Related Keywords


# Jan - April 2020
c1  <- feols(rating ~ chineseall_vs_american*prepost  + elitestatus | city  ,
             cluster = ~business.id ,data = all1)
# Jan 2020 - May 2020
c2  <- feols(rating ~ chineseall_vs_american*prepost   + elitestatus | city  ,
             cluster = ~business.id ,data = all2)
# Jan - June 2020
c3  <- feols(rating ~ chineseall_vs_american*prepost   + elitestatus | city  ,
             cluster = ~business.id ,data = all3)
# Jan - July 2020
c4  <- feols(rating ~ chineseall_vs_american*prepost   + elitestatus | city  ,
             cluster = ~business.id ,data = all4)
# Jan - Aug 2020
c5  <- feols(rating ~ chineseall_vs_american*prepost   + elitestatus | city  ,
             cluster = ~business.id ,data = all5)
# Jan - Sep 2020
c6  <- feols(rating ~ chineseall_vs_american*prepost   + elitestatus | city  ,
             cluster = ~business.id ,data = all6)
summary(c6)

etable(c1, c2, c3, c4, c5, c6, digits = 3,label = c("tab:yelp"),tex = T, depvar = F, title = "The Impact of Pandemic on Chinese  (All Chinese-Related Keywords)",
       file = "SM Table B1.tex",replace=T)


## Table B2: The Impact of the Pandemic on Restaurant Ratings - Using Only Those with Pre-Post Data



# Jan  2020 - April 2020
c1  <- feols(rating ~ chinese_vs_american*prepost +  elitestatus  | 
               city ,cluster = ~business.id ,data = all1s)

# Jan 2020 - May 2020
c2  <- feols(rating ~ chinese_vs_american*prepost  +  elitestatus| city,
             cluster = ~business.id ,data = all2s)

# Jan 2020  - June 2020
c3  <- feols(rating ~ chinese_vs_american*prepost +  elitestatus | city,
             cluster = ~business.id ,data = all3s)

# Jan 2020  - July 2020
c4  <- feols(rating ~ chinese_vs_american*prepost +  elitestatus | city,
             cluster = ~business.id ,data = all4s)

# Jan 2020  - August 2020
c5  <- feols(rating ~ chinese_vs_american*prepost +  elitestatus | city,
             cluster = ~business.id ,data = all5s)

# Jan 2020  - Sep 2020
c6  <- feols(rating ~ chinese_vs_american*prepost +  elitestatus | city,
             cluster = ~business.id ,data = all6s)

etable(c1, c2, c3, c4, c5, c6, digits = 3,label = c("tab:yelp"),tex = T, depvar = F, title = "The Impact of Pandemic on Chinese Restaurants",
       file = "SM_Table B2.tex",replace=T)


## Table B3: Placebo Test 1: Using 2019 March 13 as Cutoff


# Jan 2019 - April 2019
p1  <- feols(rating ~ chinese_vs_american*prepost_2019placebo  +  elitestatus | 
               city,cluster = ~business.id ,data = all1p)


# Jan 2019 - May 2019
p2  <- feols(rating ~ chinese_vs_american*prepost_2019placebo  +  elitestatus | 
               city,cluster = ~business.id ,data = all2p)

# Jan 2019 - June 2019

p3  <- feols(rating ~ chinese_vs_american*prepost_2019placebo  +  elitestatus | 
               city,cluster = ~business.id ,data = all3p)

# Jan 2019- July 2019 

p4  <- feols(rating ~ chinese_vs_american*prepost_2019placebo  +  elitestatus | 
               city,cluster = ~business.id ,data = all4p)

etable(p1, p2, p3, p4,  digits = 3,label = c("tab:yelp"),tex = T, depvar = F, title = "Using 2019 March 13 as Placebo Date",
       file = "SM Table B3.tex",replace=T)


## Table B4: Placebo Test 2: Difference-in-Differences among Chinese Restaurants


c1  <- feols(rating ~ univcutoff*y2020  | city  + elitestatus  , cluster = ~business.id, 
             data = all_s_twoyear[all_s_twoyear$twoyear1==1 & all_s_twoyear$chinese_vs_american==1,])

c2  <- feols(rating ~ univcutoff*y2020  | city  + elitestatus  , cluster = ~business.id, 
             data = all_s_twoyear[all_s_twoyear$twoyear2==1 & all_s_twoyear$chinese_vs_american==1,])

c3  <- feols(rating ~ univcutoff*y2020  | city  + elitestatus  , cluster = ~business.id, 
             data = all_s_twoyear[all_s_twoyear$twoyear3==1 & all_s_twoyear$chinese_vs_american==1,])

c4  <- feols(rating ~ univcutoff*y2020  | city  + elitestatus  , cluster = ~business.id, 
             data = all_s_twoyear[all_s_twoyear$twoyear1==1 & all_s_twoyear$chineseall_vs_american==1,])

c5  <- feols(rating ~ univcutoff*y2020  | city  + elitestatus  , cluster = ~business.id, 
             data = all_s_twoyear[all_s_twoyear$twoyear2==1 & all_s_twoyear$chineseall_vs_american==1,])

c6  <- feols(rating ~ univcutoff*y2020  | city  + elitestatus  , cluster = ~business.id, 
             data = all_s_twoyear[all_s_twoyear$twoyear3==1 & all_s_twoyear$chineseall_vs_american==1,])

etable(c1, c2, c3, c4, c5, c6, digits = 3,label = c("tab:yelp"),tex = T, depvar = F, title = "Within-Chinese Restaurants Diff-in-Diff",
       file = "SM Table B4.tex",replace=T)



## Table B5: The Impact of the Pandemic on Restaurant Ratings with March 1 as a Cutoff


# Jan 2020 - April 2020
c1  <- feols(rating ~ chinese_vs_american*prepost_march1   | city + elitestatus,cluster = ~business.id ,data = all1)

# Jan 2020 - May 2020
c2  <- feols(rating ~ chinese_vs_american*prepost_march1  | city + elitestatus,cluster = ~business.id ,data = all2)

# Jan 2020 - June 2020
c3  <- feols(rating ~ chinese_vs_american*prepost_march1   | city + elitestatus,cluster = ~business.id ,data = all3)

# Jan 2020 - July 2020
c4  <- feols(rating ~ chinese_vs_american*prepost_march1   | city + elitestatus,cluster = ~business.id ,data = all4)

# Jan 2020 - August 2020
c5  <- feols(rating ~ chinese_vs_american*prepost_march1   | city + elitestatus,cluster = ~business.id ,data = all5)

# Jan 2020 - September 2020
c6  <- feols(rating ~ chinese_vs_american*prepost_march1   | city + elitestatus,cluster = ~business.id ,data = all6)

etable(c1, c2, c3, c4, c5, c6, digits = 3,label = c("tab:yelp"),tex = T, depvar = F, title = "The Impact of Pandemic on Chinese Restaurants (All)",
       file = "SM Table B5.tex",replace=T)


################################################################  
## Supplementary Material C: Addressing Generalizability
################################################################  


## Table C1. Anti-Asian Attitudes in Urban, Suburban, and Rural Counties

# 2013 rural-urgan continnum codes 

# Metropolitan Counties*			

#  Code	Description		
#1	Counties in metro areas of 1 million population or more		
#2	Counties in metro areas of 250,000 to 1 million population		
#3	Counties in metro areas of fewer than 250,000 population		

# Nonmetropolitan Counties			

#4	Urban population of 20,000 or more, adjacent to a metro area		
#5	Urban population of 20,000 or more, not adjacent to a metro area		
#6	Urban population of 2,500 to 19,999, adjacent to a metro area		
#7	Urban population of 2,500 to 19,999, not adjacent to a metro area		
#8	Completely rural or less than 2,500 urban population, adjacent to a metro area		
#9	Completely rural or less than 2,500 urban population, not adjacent to a metro area		

pre <- ns_ruralurban  %>%
  filter(prepost==0) %>%
  group_by(RUCC_2013) %>%
  dplyr::summarise(unfav= mean(asian_unfav, na.rm=T), n=sum(asian_unfav, na.rm=T), .groups='drop')

post <- ns_ruralurban  %>%
  filter(prepost==1) %>%
  group_by(RUCC_2013) %>%
  dplyr::summarise(unfav= mean(asian_unfav, na.rm=T), n=sum(asian_unfav, na.rm=T), .groups='drop')

pre <- as.data.frame(pre)
post <- as.data.frame(post)
diff <- post$unfav - pre$unfav

# Asian unfavorability pre-lockdown 
pre
# Asian unfavorability post-lockdown 
post
# pre-post difference in Asian unfavorability 
diff 




